home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2009 February / PCWFEB09.iso / Software / Resources / Chat & Communication / Digsby build 37 / digsby_setup.exe / lib / tokenize.pyo (.txt) < prev    next >
Python Compiled Bytecode  |  2008-10-13  |  8KB  |  296 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.5)
  3.  
  4. __author__ = 'Ka-Ping Yee <ping@lfw.org>'
  5. __credits__ = 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
  6. import string
  7. import re
  8. from token import *
  9. import token
  10. __all__ = _[1] + [
  11.     'COMMENT',
  12.     'tokenize',
  13.     'generate_tokens',
  14.     'NL',
  15.     'untokenize']
  16. del x
  17. del token
  18. COMMENT = N_TOKENS
  19. tok_name[COMMENT] = 'COMMENT'
  20. NL = N_TOKENS + 1
  21. tok_name[NL] = 'NL'
  22. N_TOKENS += 2
  23.  
  24. def group(*choices):
  25.     return '(' + '|'.join(choices) + ')'
  26.  
  27.  
  28. def any(*choices):
  29.     return group(*choices) + '*'
  30.  
  31.  
  32. def maybe(*choices):
  33.     return group(*choices) + '?'
  34.  
  35. Whitespace = '[ \\f\\t]*'
  36. Comment = '#[^\\r\\n]*'
  37. Ignore = Whitespace + any('\\\\\\r?\\n' + Whitespace) + maybe(Comment)
  38. Name = '[a-zA-Z_]\\w*'
  39. Hexnumber = '0[xX][\\da-fA-F]*[lL]?'
  40. Octnumber = '0[0-7]*[lL]?'
  41. Decnumber = '[1-9]\\d*[lL]?'
  42. Intnumber = group(Hexnumber, Octnumber, Decnumber)
  43. Exponent = '[eE][-+]?\\d+'
  44. Pointfloat = group('\\d+\\.\\d*', '\\.\\d+') + maybe(Exponent)
  45. Expfloat = '\\d+' + Exponent
  46. Floatnumber = group(Pointfloat, Expfloat)
  47. Imagnumber = group('\\d+[jJ]', Floatnumber + '[jJ]')
  48. Number = group(Imagnumber, Floatnumber, Intnumber)
  49. Single = "[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"
  50. Double = '[^"\\\\]*(?:\\\\.[^"\\\\]*)*"'
  51. Single3 = "[^'\\\\]*(?:(?:\\\\.|'(?!''))[^'\\\\]*)*'''"
  52. Double3 = '[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""'
  53. Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')
  54. String = group("[uU]?[rR]?'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*'", '[uU]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*"')
  55. Operator = group('\\*\\*=?', '>>=?', '<<=?', '<>', '!=', '//=?', '[+\\-*/%&|^=<>]=?', '~')
  56. Bracket = '[][(){}]'
  57. Special = group('\\r?\\n', '[:;.,`@]')
  58. Funny = group(Operator, Bracket, Special)
  59. PlainToken = group(Number, Funny, String, Name)
  60. Token = Ignore + PlainToken
  61. ContStr = group("[uU]?[rR]?'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*" + group("'", '\\\\\\r?\\n'), '[uU]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*' + group('"', '\\\\\\r?\\n'))
  62. PseudoExtras = group('\\\\\\r?\\n', Comment, Triple)
  63. PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
  64. (tokenprog, pseudoprog, single3prog, double3prog) = map(re.compile, (Token, PseudoToken, Single3, Double3))
  65. endprogs = {
  66.     "'": re.compile(Single),
  67.     '"': re.compile(Double),
  68.     "'''": single3prog,
  69.     '"""': double3prog,
  70.     "r'''": single3prog,
  71.     'r"""': double3prog,
  72.     "u'''": single3prog,
  73.     'u"""': double3prog,
  74.     "ur'''": single3prog,
  75.     'ur"""': double3prog,
  76.     "R'''": single3prog,
  77.     'R"""': double3prog,
  78.     "U'''": single3prog,
  79.     'U"""': double3prog,
  80.     "uR'''": single3prog,
  81.     'uR"""': double3prog,
  82.     "Ur'''": single3prog,
  83.     'Ur"""': double3prog,
  84.     "UR'''": single3prog,
  85.     'UR"""': double3prog,
  86.     'r': None,
  87.     'R': None,
  88.     'u': None,
  89.     'U': None }
  90. triple_quoted = { }
  91. for t in ("'''", '"""', "r'''", 'r"""', "R'''", 'R"""', "u'''", 'u"""', "U'''", 'U"""', "ur'''", 'ur"""', "Ur'''", 'Ur"""', "uR'''", 'uR"""', "UR'''", 'UR"""'):
  92.     triple_quoted[t] = t
  93.  
  94. single_quoted = { }
  95. for t in ("'", '"', "r'", 'r"', "R'", 'R"', "u'", 'u"', "U'", 'U"', "ur'", 'ur"', "Ur'", 'Ur"', "uR'", 'uR"', "UR'", 'UR"'):
  96.     single_quoted[t] = t
  97.  
  98. tabsize = 8
  99.  
  100. class TokenError(Exception):
  101.     pass
  102.  
  103.  
  104. class StopTokenizing(Exception):
  105.     pass
  106.  
  107.  
  108. def printtoken(type, token, .2, .3, line):
  109.     (srow, scol) = .2
  110.     (erow, ecol) = .3
  111.     print '%d,%d-%d,%d:\t%s\t%s' % (srow, scol, erow, ecol, tok_name[type], repr(token))
  112.  
  113.  
  114. def tokenize(readline, tokeneater = printtoken):
  115.     
  116.     try:
  117.         tokenize_loop(readline, tokeneater)
  118.     except StopTokenizing:
  119.         pass
  120.  
  121.  
  122.  
  123. def tokenize_loop(readline, tokeneater):
  124.     for token_info in generate_tokens(readline):
  125.         tokeneater(*token_info)
  126.     
  127.  
  128.  
  129. def untokenize(iterable):
  130.     startline = False
  131.     prevstring = False
  132.     indents = []
  133.     toks = []
  134.     toks_append = toks.append
  135.     for tok in iterable:
  136.         (toknum, tokval) = tok[:2]
  137.         if toknum in (NAME, NUMBER):
  138.             tokval += ' '
  139.         
  140.         if toknum == STRING:
  141.             if prevstring:
  142.                 tokval = ' ' + tokval
  143.             
  144.             prevstring = True
  145.         else:
  146.             prevstring = False
  147.         if toknum == INDENT:
  148.             indents.append(tokval)
  149.             continue
  150.         elif toknum == DEDENT:
  151.             indents.pop()
  152.             continue
  153.         elif toknum in (NEWLINE, COMMENT, NL):
  154.             startline = True
  155.         elif startline and indents:
  156.             toks_append(indents[-1])
  157.             startline = False
  158.         
  159.         toks_append(tokval)
  160.     
  161.     return ''.join(toks)
  162.  
  163.  
  164. def generate_tokens(readline):
  165.     lnum = parenlev = continued = 0
  166.     namechars = string.ascii_letters + '_'
  167.     numchars = '0123456789'
  168.     (contstr, needcont) = ('', 0)
  169.     contline = None
  170.     indents = [
  171.         0]
  172.     while None:
  173.         
  174.         try:
  175.             line = readline()
  176.         except StopIteration:
  177.             line = ''
  178.  
  179.         lnum = lnum + 1
  180.         pos = 0
  181.         max = len(line)
  182.         if contstr:
  183.             if not line:
  184.                 raise TokenError, ('EOF in multi-line string', strstart)
  185.             
  186.             endmatch = endprog.match(line)
  187.             if endmatch:
  188.                 pos = end = endmatch.end(0)
  189.                 yield (STRING, contstr + line[:end], strstart, (lnum, end), contline + line)
  190.                 (contstr, needcont) = ('', 0)
  191.                 contline = None
  192.             elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
  193.                 yield (ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), contline)
  194.                 contstr = ''
  195.                 contline = None
  196.                 continue
  197.             else:
  198.                 contstr = contstr + line
  199.                 contline = contline + line
  200.         elif parenlev == 0 and not continued:
  201.             if not line:
  202.                 break
  203.             
  204.             column = 0
  205.             while pos < max:
  206.                 if line[pos] == ' ':
  207.                     column = column + 1
  208.                 elif line[pos] == '\t':
  209.                     column = (column / tabsize + 1) * tabsize
  210.                 elif line[pos] == '\x0c':
  211.                     column = 0
  212.                 else:
  213.                     break
  214.                 pos = pos + 1
  215.             if pos == max:
  216.                 break
  217.             
  218.             if line[pos] in '#\r\n':
  219.                 yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], (lnum, pos), (lnum, len(line)), line)
  220.                 continue
  221.             
  222.             if column > indents[-1]:
  223.                 indents.append(column)
  224.                 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
  225.             
  226.             while column < indents[-1]:
  227.                 if column not in indents:
  228.                     raise IndentationError('unindent does not match any outer indentation level', ('<tokenize>', lnum, pos, line))
  229.                 
  230.                 indents = indents[:-1]
  231.                 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
  232.         elif not line:
  233.             raise TokenError, ('EOF in multi-line statement', (lnum, 0))
  234.         
  235.         continued = 0
  236.         while pos < max:
  237.             pseudomatch = pseudoprog.match(line, pos)
  238.             if pseudomatch:
  239.                 (start, end) = pseudomatch.span(1)
  240.                 spos = (lnum, start)
  241.                 epos = (lnum, end)
  242.                 pos = end
  243.                 token = line[start:end]
  244.                 initial = line[start]
  245.                 if (initial in numchars or initial == '.') and token != '.':
  246.                     yield (NUMBER, token, spos, epos, line)
  247.                 elif initial in '\r\n':
  248.                     if not parenlev > 0 or NL:
  249.                         pass
  250.                     yield (NEWLINE, token, spos, epos, line)
  251.                 elif initial == '#':
  252.                     yield (COMMENT, token, spos, epos, line)
  253.                 elif token in triple_quoted:
  254.                     endprog = endprogs[token]
  255.                     endmatch = endprog.match(line, pos)
  256.                     if endmatch:
  257.                         pos = endmatch.end(0)
  258.                         token = line[start:pos]
  259.                         yield (STRING, token, spos, (lnum, pos), line)
  260.                     else:
  261.                         strstart = (lnum, start)
  262.                         contstr = line[start:]
  263.                         contline = line
  264.                         break
  265.                 elif initial in single_quoted and token[:2] in single_quoted or token[:3] in single_quoted:
  266.                     if token[-1] == '\n':
  267.                         strstart = (lnum, start)
  268.                         if not endprogs[initial] and endprogs[token[1]]:
  269.                             pass
  270.                         endprog = endprogs[token[2]]
  271.                         contstr = line[start:]
  272.                         needcont = 1
  273.                         contline = line
  274.                         break
  275.                     else:
  276.                         yield (STRING, token, spos, epos, line)
  277.                 elif initial in namechars:
  278.                     yield (NAME, token, spos, epos, line)
  279.                 elif initial == '\\':
  280.                     continued = 1
  281.                 elif initial in '([{':
  282.                     parenlev = parenlev + 1
  283.                 elif initial in ')]}':
  284.                     parenlev = parenlev - 1
  285.                 
  286.                 yield (OP, token, spos, epos, line)
  287.                 continue
  288.             yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line)
  289.             pos = pos + 1
  290.         continue
  291.         for indent in indents[1:]:
  292.             yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
  293.         
  294.     yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
  295.  
  296.